import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
%matplotlib inline
# Profiling Libraries
from ydata_profiling import ProfileReport
uc= pd.read_csv(r"C:\Users\acer\Desktop\4th Semester\DBMS Lab\SQL Unicorn Project\CSV Files\Unicorn_Companies.csv")
companies = pd.read_csv(r"C:\Users\acer\Desktop\4th Semester\DBMS Lab\SQL Unicorn Project\CSV Files\Companies.csv")
dates = pd.read_csv(r"C:\Users\acer\Desktop\4th Semester\DBMS Lab\SQL Unicorn Project\CSV Files\Dates.csv")
fundings = pd.read_csv(r"C:\Users\acer\Desktop\4th Semester\DBMS Lab\SQL Unicorn Project\CSV Files\Fundings.csv")
industry = pd.read_csv(r"C:\Users\acer\Desktop\4th Semester\DBMS Lab\SQL Unicorn Project\CSV Files\Industry.csv")
ProfileReport(uc, title="Unicorn Companies Profiling Report", explorative=True)
Summarize dataset: 0%| | 0/5 [00:00<?, ?it/s]
Generate report structure: 0%| | 0/1 [00:00<?, ?it/s]
Render HTML: 0%| | 0/1 [00:00<?, ?it/s]
uc.head()
| ID | Company | Date Joined | Industry | City | Country | Continent | Valuation | Funding | Select Investors | Year Founded | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Bytedance | 4/7/2017 | Artificial intelligence | Beijing | China | Asia | $180B | $8B | Sequoia Capital China, SIG Asia Investments, S... | 2012 |
| 1 | 2 | SpaceX | 12/1/2012 | Other | Hawthorne | United States | North America | $100B | $7B | Founders Fund, Draper Fisher Jurvetson, Rothen... | 2002 |
| 2 | 3 | SHEIN | 7/3/2018 | E-commerce & direct-to-consumer | Shenzhen | China | Asia | $100B | $2B | Tiger Global Management, Sequoia Capital China... | 2008 |
| 3 | 4 | Stripe | 1/23/2014 | Fintech | San Francisco | United States | North America | $95B | $2B | Khosla Ventures, LowercaseCapital, capitalG | 2010 |
| 4 | 5 | Klarna | 12/12/2011 | Fintech | Stockholm | Sweden | Europe | $46B | $4B | Institutional Venture Partners, Sequoia Capita... | 2005 |
uc.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1074 entries, 0 to 1073 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ID 1074 non-null int64 1 Company 1074 non-null object 2 Date Joined 1074 non-null object 3 Industry 1074 non-null object 4 City 1059 non-null object 5 Country 1074 non-null object 6 Continent 1074 non-null object 7 Year Founded 1074 non-null int64 8 Funding 1074 non-null object 9 Select Investors 1073 non-null object 10 Total Raised 1074 non-null object 11 Valuation 1074 non-null object dtypes: int64(2), object(10) memory usage: 100.8+ KB
uc.describe()
| ID | Year Founded | |
|---|---|---|
| count | 1074.000000 | 1074.000000 |
| mean | 537.500000 | 2013.231844 |
| std | 310.181399 | 4.129630 |
| min | 1.000000 | 2003.500000 |
| 25% | 269.250000 | 2011.000000 |
| 50% | 537.500000 | 2014.000000 |
| 75% | 805.750000 | 2016.000000 |
| max | 1074.000000 | 2021.000000 |
uc.describecribe(include= ['object'])
| Company | Date Joined | Industry | City | Country | Continent | Funding | Select Investors | Total Raised | Valuation | |
|---|---|---|---|---|---|---|---|---|---|---|
| count | 1074 | 1074 | 1074 | 1074 | 1074 | 1074 | 1074 | 1074 | 1074 | 1074 |
| unique | 1073 | 639 | 16 | 258 | 46 | 6 | 539 | 1059 | 914 | 30 |
| top | Bolt | 7/13/2021 | Fintech | San Francisco | United States | North America | $1B | Sequoia Capital | None | $1B |
| freq | 2 | 9 | 224 | 152 | 562 | 589 | 60 | 3 | 24 | 471 |
uc.isnull().sum()
ID 0 Company 0 Date Joined 0 Industry 0 City 15 Country 0 Continent 0 Year Founded 0 Funding 0 Select Investors 1 Total Raised 0 Valuation 0 dtype: int64
uc.shape
(1074, 12)
data_dup = uc.duplicated().any()
print(data_dup)
False
dict = {}
for i in list(uc.columns):
dict[i] = uc[i].value_counts().shape[0]
pd.DataFrame(dict,index=["unique count"]).transpose()
| unique count | |
|---|---|
| ID | 1074 |
| Company | 1073 |
| Date Joined | 639 |
| Industry | 16 |
| City | 258 |
| Country | 46 |
| Continent | 6 |
| Year Founded | 35 |
| Funding | 539 |
| Select Investors | 1059 |
| Total Raised | 914 |
| Valuation | 30 |
continuous_values = []
categorical_values = []
for column in uc.columns:
if uc[column].dtype == 'int64' or uc[column].dtype == 'float64':
continuous_values.append(column)
else:
categorical_values.append(column)
categorical_values
['Company', 'Date Joined', 'Industry', 'City', 'Country', 'Continent', 'Valuation', 'Funding', 'Select Investors']
continuous_values
['ID', 'Year Founded']
uc.Continent.unique()
array(['Asia', 'North America', 'Europe', 'Oceania', 'South America',
'Africa'], dtype=object)
uc.Country.unique().tolist()
['China', 'United States', 'Sweden', 'Australia', 'United Kingdom', 'Bahamas', 'India', 'Indonesia', 'Turkey', 'Estonia', 'Germany', 'Hong Kong', 'South Korea', 'Mexico', 'Canada', 'Netherlands', 'France', 'Finland', 'Israel', 'Lithuania', 'Denmark', 'Belgium', 'Colombia', 'Brazil', 'Singapore', 'Austria', 'Ireland', 'United Arab Emirates', 'Switzerland', 'Vietnam', 'South Africa', 'Thailand', 'Norway', 'Chile', 'Argentina', 'Bermuda', 'Japan', 'Spain', 'Malaysia', 'Senegal', 'Philippines', 'Luxembourg', 'Nigeria', 'Czech Republic', 'Croatia', 'Italy']
uc.Industry.unique().tolist()
['Artificial intelligence', 'Other', 'E-commerce & direct-to-consumer', 'Fintech', 'Internet software & services', 'Supply chain, logistics, & delivery', 'Consumer & retail', 'Data management & analytics', 'Edtech', 'Health', 'Hardware', 'Auto & transportation', 'Travel', 'Cybersecurity', 'Mobile & telecommunications', 'Artificial Intelligence']
# Convert the 'Date Joined' column from a 'string' to a 'Datetime'
uc['Date Joined'] = pd.to_datetime(uc['Date Joined'])
uc.dtypes
ID int64 Company object Date Joined datetime64[ns] Industry object City object Country object Continent object Valuation object Funding object Select Investors object Year Founded int64 dtype: object
# Replace the "Artificial intelligence" in **Industry** column with "Artificial Intelligence"
uc['Industry'] = uc['Industry'].replace('Artificial intelligence', 'Artificial Intelligence')
uc['City'].fillna('Unknown',inplace = True)
# fill the missing value in 'Select Investors' with 'Unknown'
uc['Select Investors'] = uc['Select Investors'].fillna('Unknown')
uc.isnull().sum()
ID 0 Company 0 Date Joined 0 Industry 0 City 0 Country 0 Continent 0 Valuation 0 Funding 0 Select Investors 0 Year Founded 0 dtype: int64
# Replace the string "Unknown" in Funding with "0"
uc['Funding'] = uc['Funding'].replace('Unknown', '0')
# find the row column row data string 'Unknown' in the 'Funding' column
uc[uc['Funding'] == 'Unknown'].head(3)
| ID | Company | Date Joined | Industry | City | Country | Continent | Valuation | Funding | Select Investors | Year Founded |
|---|
uc[uc['Funding'] == '0'].head(3)
| ID | Company | Date Joined | Industry | City | Country | Continent | Valuation | Funding | Select Investors | Year Founded | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 215 | 216 | SSENSE | 2021-06-08 | E-commerce & direct-to-consumer | Montreal | Canada | North America | $4B | 0 | Sequoia Capital | 2003 |
| 424 | 425 | Uplight | 2021-03-03 | Other | Boulder | United States | North America | $2B | 0 | Rubicon Technology Partners, Max Ventures, Inc... | 2019 |
| 567 | 568 | ISN | 2020-12-17 | Supply chain, logistics, & delivery | Dallas | United States | North America | $2B | 0 | Blackstone | 2001 |
selected_rows = uc.loc[uc['Country'] == 'Germany'].head(3)
selected_rows
| ID | Company | Date Joined | Industry | City | Country | Continent | Valuation | Funding | Select Investors | Year Founded | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 45 | 46 | Celonis | 2018-06-26 | Data management & analytics | Munich | Germany | Europe | $11B | $1B | Accel, 83North | 2011 |
| 67 | 68 | N26 | 2019-01-10 | Fintech | Berlin | Germany | Europe | $9B | $2B | Redalpine Venture Partners, Earlybird Venture ... | 2013 |
| 116 | 117 | Personio | 2021-01-19 | Internet software & services | Munich | Germany | Europe | $6B | $524M | Global Founders Capital, Nortzone Ventures, Pi... | 2015 |
def convert_valuation(valuation_str):
# Remove the '$' signs from the string
valuation_num = valuation_str.replace('$', ' ')
# Extract the numerical part of the string
numeric_part = valuation_num[:-1]
# Get the last character to determine the scale (Billion or Million)
scale = valuation_num[-1]
# Multiply the numeric value based on the scale
if scale == 'B':
valuation_float = float(numeric_part) * 1e9
elif scale == 'M':
valuation_float = float(numeric_part) * 1e6
else:
raise ValueError('Invalid scale: {}', format(scale))
# Add back the '$' sign and return as a Decimal object
return float(valuation_float)
# Apply the custom function to the "Funding" column and create a new column 'Funding Decimal'
uc['Valuation Decimal'] = uc['Valuation'].apply(convert_valuation)
uc.head(3)
| ID | Company | Date Joined | Industry | City | Country | Continent | Valuation | Funding | Select Investors | Year Founded | Valuation Decimal | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Bytedance | 2017-04-07 | Artificial Intelligence | Beijing | China | Asia | $180B | $8B | Sequoia Capital China, SIG Asia Investments, S... | 2012 | 1.800000e+11 |
| 1 | 2 | SpaceX | 2012-12-01 | Other | Hawthorne | United States | North America | $100B | $7B | Founders Fund, Draper Fisher Jurvetson, Rothen... | 2002 | 1.000000e+11 |
| 2 | 3 | SHEIN | 2018-07-03 | E-commerce & direct-to-consumer | Shenzhen | China | Asia | $100B | $2B | Tiger Global Management, Sequoia Capital China... | 2008 | 1.000000e+11 |
# Define a custom function to convert the string to Decimal
def convert_funding(funding_str):
# Remove the '$' signs from the string
funding_num = funding_str.replace('$', ' ')
# Extract the numerical part of the string
numeric_part = funding_num[:-1]
# Get the last character to determine the scale (Billion or Million)
scale = funding_num[-1]
# Multiply the numeric value based on the scale
if scale == 'B':
funding_float = float(numeric_part) * 1e9
elif scale == 'M':
funding_float = float(numeric_part) * 1e6
elif scale == '0':
funding_float = '0'
else:
raise ValueError('Invalid scale: {}', format(scale))
# Add back the '$' sign and return as a Decimal object
return float(funding_float)
# Apply the custom function to the "Funding" column and create a new column 'Funding Decimal'
uc['Funding Decimal'] = uc['Funding'].apply(convert_funding)
uc.head(3)
| ID | Company | Date Joined | Industry | City | Country | Continent | Valuation | Funding | Select Investors | Year Founded | Valuation Decimal | Funding Decimal | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Bytedance | 2017-04-07 | Artificial Intelligence | Beijing | China | Asia | $180B | $8B | Sequoia Capital China, SIG Asia Investments, S... | 2012 | 1.800000e+11 | 8.000000e+09 |
| 1 | 2 | SpaceX | 2012-12-01 | Other | Hawthorne | United States | North America | $100B | $7B | Founders Fund, Draper Fisher Jurvetson, Rothen... | 2002 | 1.000000e+11 | 7.000000e+09 |
| 2 | 3 | SHEIN | 2018-07-03 | E-commerce & direct-to-consumer | Shenzhen | China | Asia | $100B | $2B | Tiger Global Management, Sequoia Capital China... | 2008 | 1.000000e+11 | 2.000000e+09 |
uc.dtypes
ID int64 Company object Date Joined datetime64[ns] Industry object City object Country object Continent object Valuation object Funding object Select Investors object Year Founded int64 Valuation Decimal float64 Funding Decimal float64 dtype: object
uc.drop(columns= ['Valuation', 'Funding'], inplace= True)
uc.dtypes
ID int64 Company object Date Joined datetime64[ns] Industry object City object Country object Continent object Select Investors object Year Founded int64 Valuation Decimal float64 Funding Decimal float64 dtype: object
# Rename the 'Valuation Decimal' and 'Funding Decimal' back to 'Valuation' and 'Funding'
uc.rename(columns={'Valuation Decimal' : 'Valuation'}, inplace= True)
uc.dtypes
ID int64 Company object Date Joined datetime64[ns] Industry object City object Country object Continent object Select Investors object Year Founded int64 Valuation float64 Funding Decimal float64 dtype: object
uc.rename(columns={'Funding Decimal' : 'Funding'}, inplace= True)
uc.dtypes
ID int64 Company object Date Joined datetime64[ns] Industry object City object Country object Continent object Select Investors object Year Founded int64 Valuation float64 Funding float64 dtype: object
# Change the position of the column 'Valuation' back to where it was initially
column_names= uc.columns.tolist()
# Remove the 'Valuation' column from the current position
valuation_col1 = column_names.pop(column_names.index('Valuation'))
# Insert the 'Valuation' column at the desired position
column_names.insert(1, valuation_col1)
# Reindex the dataframe with the new column order
uc = uc.reindex(columns= column_names)
uc.head(3)
| ID | Valuation | Company | Date Joined | Industry | City | Country | Continent | Select Investors | Year Founded | Funding | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 1.800000e+11 | Bytedance | 2017-04-07 | Artificial Intelligence | Beijing | China | Asia | Sequoia Capital China, SIG Asia Investments, S... | 2012 | 8.000000e+09 |
| 1 | 2 | 1.000000e+11 | SpaceX | 2012-12-01 | Other | Hawthorne | United States | North America | Founders Fund, Draper Fisher Jurvetson, Rothen... | 2002 | 7.000000e+09 |
| 2 | 3 | 1.000000e+11 | SHEIN | 2018-07-03 | E-commerce & direct-to-consumer | Shenzhen | China | Asia | Tiger Global Management, Sequoia Capital China... | 2008 | 2.000000e+09 |
# Recheck the data size
uc.size
11814
# Recheck the data size
uc.shape
(1074, 11)
# Recheck the data columns
uc.columns
Index(['ID', 'Valuation', 'Company', 'Date Joined', 'Industry', 'City',
'Country', 'Continent', 'Select Investors', 'Year Founded', 'Funding'],
dtype='object')
new_size = uc.groupby(['Continent', 'Country']).size()
new_size
Continent Country
Africa Nigeria 1
Senegal 1
South Africa 1
Asia China 173
Hong Kong 6
India 65
Indonesia 6
Israel 20
Japan 5
Malaysia 1
Philippines 2
Singapore 12
South Africa 1
South Korea 12
Thailand 2
United Arab Emirates 3
Vietnam 2
Europe Austria 2
Belgium 3
Croatia 1
Czech Republic 1
Denmark 2
Estonia 2
Finland 4
France 24
Germany 26
Ireland 5
Italy 1
Lithuania 1
Luxembourg 1
Netherlands 6
Norway 4
Spain 3
Sweden 6
Switzerland 5
Turkey 3
United Kingdom 43
North America Bahamas 1
Bermuda 1
Canada 19
Mexico 6
United States 562
Oceania Australia 8
South America Argentina 1
Brazil 16
Chile 2
Colombia 2
dtype: int64
uc.Continent.unique()
array(['Asia', 'North America', 'Europe', 'Oceania', 'South America',
'Africa'], dtype=object)
uc.Continent.value_counts()
North America 589 Asia 310 Europe 143 South America 21 Oceania 8 Africa 3 Name: Continent, dtype: int64
# Total Valuation of Unicorn Companies
valuation_sum = uc['Valuation'].sum()
new_valuation = '${:,.3f}.T'.format(valuation_sum/ 10**12)
new_valuation
'$3.711.T'
# Total Number of Unicorn Continents
continents = uc.Continent.nunique()
continents
6
# Total Number of Unicorn Continents
Countries = uc.Country.nunique()
Countries
46
# Total Amount Received by Unicorn Companies
funding_sum = uc['Funding'].sum()
new_funding = '${:,.3f}.B'.format(funding_sum/ 10**9)
new_funding
'$591.820.B'
# Total Valuation of Unicorn Companies
valuation_sum = uc['Valuation'].sum()
new_valuation = '${:,.3f}.T'.format(valuation_sum/ 10**12)
# Total Number of Unicorn Companies
companies = uc.Company.nunique() - -1
# Total Number of Unicorn Industries
industries = uc.Industry.nunique()
# Total Number of Unicorn Continents
continents = uc.Continent.nunique()
# Total No of Unicorn Countries
countries = uc.Country.nunique()
# Total Number of Unicorn Cities
cities = uc.City.nunique()
# Total Amount Received by Unicorn Companies
funding_sum = uc['Funding'].sum()
new_funding = '${:,.3f}.B'.format(funding_sum/ 10**9)
# Print Functions
print('The Total Valuation of Unicorns is: ', new_valuation)
print('The Total No. of Unicorn Companies is: ', companies)
print('The Total No. of Unicorn Industries is: ', industries)
print('The Total No. of Unicorn Continents is: ', continents)
print('The Total No. of Unicorn Countries is: ', countries)
print('The Total No. of Unicorn Cities is: ', cities)
print('The Total Funding received by Unicorns is: ', new_funding)
The Total Valuation of Unicorns is: $3.711.T The Total No. of Unicorn Companies is: 1074 The Total No. of Unicorn Industries is: 15 The Total No. of Unicorn Continents is: 6 The Total No. of Unicorn Countries is: 46 The Total No. of Unicorn Cities is: 257 The Total Funding received by Unicorns is: $591.820.B
# Step 1: Find the maximum funding value
max_funding = uc['Funding'].max()
# Step 2: Locate the company associated with this maximum funding
max_funding_companies = uc[uc['Funding'] == max_funding]['Company'].tolist()
# Step 3: Convert the funding value to a scalable format
def format_funding(value):
if value >= 1e9:
return f"{value / 1e9:.2f} billion"
elif value >= 1e6:
return f"{value / 1e6:.2f} million"
else:
return f"{value:.2f}"
max_funding_formatted = format_funding(max_funding)
# Step 4: Print the results
for company in max_funding_companies:
print(f"Company: {company}, Funding: {max_funding_formatted}")
Company: JUUL Labs, Funding: 14.00 billion
# Step 1: Find the maximum funding value
max_valuation = uc['Valuation'].max()
# Step 2: Locate the company associated with this maximum funding
max_valuation_companies = uc[uc['Valuation'] == max_valuation]['Company'].tolist()
# Step 3: Convert the funding value to a scalable format
def format_valuation(value):
if value >= 1e9:
return f"{value / 1e9:.2f} billion"
elif value >= 1e6:
return f"{value / 1e6:.2f} million"
else:
return f"{value:.2f}"
max_valuation_formatted = format_valuation(max_valuation)
# Step 4: Print the results
for company in max_valuation_companies:
print(f"Company: {company}, Valuation: {max_valuation_formatted}")
Company: Bytedance, Valuation: 180.00 billion
import pandas as pd
import plotly.graph_objects as go
# Calculate the total valuation by industry
industry_total_val = uc[["Industry", "Valuation"]].groupby(by="Industry").sum()
# Ensure the resulting DataFrame is not empty
if not industry_total_val.empty:
# Define colors for the pie chart
colors = [
'#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A',
'#19D3F3', '#FF6692', '#B6E880', '#FF97FF', '#FECB52'
# Add more colors if needed
]
# Create the pie chart
fig3 = go.Figure()
fig3.add_trace(go.Pie(labels=industry_total_val.index,
values=industry_total_val["Valuation"],
marker=dict(colors=colors)))
# Update the layout
fig3.update_layout(title='Industries distribution by total valuation')
# Show the figure
fig3.show()
else:
print("The DataFrame is empty. Please check the input data.")
import pandas as pd
import numpy as np
import plotly.graph_objects as go
# Filter out rows where 'Year Founded' is NaN
uc_with_fyear = uc[~uc['Year Founded'].isna()]
# Count the number of occurrences for each 'Year Founded'
num_by_founded_year = uc_with_fyear["Year Founded"].value_counts().reset_index()
num_by_founded_year.columns = ['Year Founded', 'Count']
num_by_founded_year["Year Founded"] = num_by_founded_year["Year Founded"].astype(np.int64)
num_by_founded_year = num_by_founded_year[num_by_founded_year["Year Founded"] >= 1990]
num_by_founded_year.sort_values(by=["Year Founded"], inplace=True)
years = pd.DataFrame({"years": num_by_founded_year["Year Founded"]})
# Create the initial figure
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=num_by_founded_year["Year Founded"], y=num_by_founded_year["Count"],
mode='lines',
name='lines'))
# Create update menu buttons
updatemenu = []
buttons = []
# Button for all industries
buttons.append(dict(method='update',
label="All industries",
visible=True,
args=[{'y': [num_by_founded_year["Count"]],
'x': [num_by_founded_year["Year Founded"]],
'type': 'scatter'},
{'title': "Number of unicorns in All industries since 1990"}],
))
# Buttons for each industry
for indst in uc_with_fyear.Industry.unique():
selected_industry = uc_with_fyear[uc_with_fyear["Industry"] == indst]
temp_vc = selected_industry["Year Founded"].value_counts().reset_index()
temp_vc.columns = ['Year Founded', 'Count']
temp_vc["Year Founded"] = temp_vc["Year Founded"].astype(np.int64)
temp_vc.sort_values(by=["Year Founded"], inplace=True)
result = years.set_index('years').join(temp_vc.set_index('Year Founded'), how='left').fillna(0)
result["Count"] = result["Count"].astype(int)
buttons.append(dict(method='update',
label=indst,
visible=True,
args=[{'y': [result["Count"]],
'x': [years["years"]],
'type': 'scatter'},
{'title': f"Number of unicorns in {indst} since 1990"}],
))
# Update the layout with the buttons
updatemenu = [dict()]
updatemenu[0]['buttons'] = buttons
updatemenu[0]['direction'] = 'down'
updatemenu[0]['showactive'] = True
# Update the figure layout
fig1.update_layout(showlegend=False, updatemenus=updatemenu)
fig1.update_layout(
title="Number of unicorns in All industries since 1990",
xaxis_title='Founded Year',
yaxis_title='Unicorns count')
# Show the figure
fig1.show()
# The Decriptive Statistics of the Numerical Columns
int_columns = uc['Year Founded'].describe().astype(int)
float_columns = uc[['Valuation','Funding']].describe().astype(float)
# Concatenate/ join the'Year Funded' to show as int, and 'Valuation' and 'Funding' to show as float
joined_columns = pd.concat([float_columns, int_columns], axis= 1)
joined_columns
| Valuation | Funding | Year Founded | |
|---|---|---|---|
| count | 1.074000e+03 | 1.074000e+03 | 1074 |
| mean | 3.455307e+09 | 5.510428e+08 | 2012 |
| std | 8.547022e+09 | 8.077194e+08 | 5 |
| min | 1.000000e+09 | 0.000000e+00 | 1919 |
| 25% | 1.000000e+09 | 2.180000e+08 | 2011 |
| 50% | 2.000000e+09 | 3.650000e+08 | 2014 |
| 75% | 3.000000e+09 | 6.030000e+08 | 2016 |
| max | 1.800000e+11 | 1.400000e+10 | 2021 |
Valuation:
Funding:
Year Founded:
Summary:
# Step 1: Identify the top 10 companies by their total valuation
top10_companies = uc.groupby('Company')['Valuation'].sum().sort_values(ascending=False).head(10).reset_index(name='Valuation')
# Step 2: Scale the valuations to billions
top10_companies['Valuation (in billions)'] = top10_companies['Valuation'] / 1e9
# Display the top 10 companies with scaled valuations
top10_companies[['Company', 'Valuation (in billions)']]
| Company | Valuation (in billions) | |
|---|---|---|
| 0 | Bytedance | 180.0 |
| 1 | SpaceX | 100.0 |
| 2 | SHEIN | 100.0 |
| 3 | Stripe | 95.0 |
| 4 | Klarna | 46.0 |
| 5 | Checkout.com | 40.0 |
| 6 | Canva | 40.0 |
| 7 | Instacart | 39.0 |
| 8 | JUUL Labs | 38.0 |
| 9 | Databricks | 38.0 |
# Create a figure and axis
fig, ax = plt.subplots(figsize=(12, 8))
# Create the horizontal barplot with a custom color palette
palette = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
sns.barplot(x="Valuation", y="Company", data=top10_companies, palette=palette, ax=ax)
# Add 'Valuation' labels to the bars
for index, row in top10_companies.iterrows():
valuation = row["Valuation"]
if valuation >= 1e9:
label = f"${valuation/1e9:.1f}B"
else:
label = f"${valuation/1e6:.1f}M"
ax.text(valuation, index, label, ha="left", va="center", fontsize=10, color="black", fontweight='bold')
# Customize the plot
ax.set(xlabel="Total Valuation (in USD)", ylabel="Company")
plt.title("Top 10 Most Valuable Unicorn Companies", fontsize=16, fontweight='bold')
plt.xticks(rotation=0)
plt.grid(axis='x', linestyle='--', alpha=0.7)
# Improve layout
plt.tight_layout()
# Show the plot
plt.show()
top9_companyF = uc.groupby('Company')['Funding'].sum().sort_values(ascending=False).head(9).reset_index()
# Step 2: Scale the funding values to billions
top9_companyF['Funding (in billions)'] = top9_companyF['Funding'] / 1e9
# Display the top 9 companies with scaled funding values
top9_companyF[['Company', 'Funding (in billions)']]
| Company | Funding (in billions) | |
|---|---|---|
| 0 | JUUL Labs | 14.0 |
| 1 | Bytedance | 8.0 |
| 2 | Epic Games | 7.0 |
| 3 | SpaceX | 7.0 |
| 4 | Global Switch | 5.0 |
| 5 | Xingsheng Selected | 5.0 |
| 6 | Swiggy | 5.0 |
| 7 | J&T Express | 5.0 |
| 8 | BYJU's | 4.0 |
fig, ax = plt.subplots(figsize=(12, 8))
# Create the horizontal barplot with a custom color palette
palette = sns.color_palette("viridis", 8)
sns.barplot(x="Funding (in billions)", y="Company", data=top9_companyF, palette=palette, ax=ax)
# Add 'Funding' labels to the bar
for index, row in top9_companyF.iterrows():
funding = row["Funding (in billions)"]
label = f"${funding:.1f}B"
ax.text(funding, index, label, ha="left", va="center", fontsize=10, color="black", fontweight='bold')
# Customize the plot
ax.set(xlabel="Total Funding (in billions USD)", ylabel="Company")
plt.title("Top 9 Companies With The Highest Funding", fontsize=16, fontweight='bold')
plt.xticks(rotation=0)
plt.grid(axis='x', linestyle='--', alpha=0.7)
# Improve layout
plt.tight_layout()
# Show the plot
plt.show()
uc['ROI'] = (uc['Valuation'] - uc['Funding']) / uc['Funding']
# Step 2: Handle any NaN or infinite values
df = uc.replace([np.inf, -np.inf], np.nan).dropna(subset=['ROI'])
# Step 3: Identify the top companies by ROI
top10_roi = df.sort_values(by='ROI', ascending=False).head(10).reset_index()
# Step 4: Display the top 10 companies
top10_roi[['Company', 'Valuation', 'Funding', 'ROI']]
| Company | Valuation | Funding | ROI | |
|---|---|---|---|---|
| 0 | Zapier | 4.000000e+09 | 1000000.0 | 3999.000000 |
| 1 | Dunamu | 9.000000e+09 | 71000000.0 | 125.760563 |
| 2 | Workhuman | 1.000000e+09 | 9000000.0 | 110.111111 |
| 3 | CFGI | 2.000000e+09 | 19000000.0 | 104.263158 |
| 4 | Manner | 1.000000e+09 | 10000000.0 | 99.000000 |
| 5 | DJI Innovations | 8.000000e+09 | 105000000.0 | 75.190476 |
| 6 | GalaxySpace | 1.000000e+09 | 14000000.0 | 70.428571 |
| 7 | Canva | 4.000000e+10 | 572000000.0 | 68.930070 |
| 8 | Il Makiage | 2.000000e+09 | 29000000.0 | 67.965517 |
| 9 | Revolution Precrafted | 1.000000e+09 | 15000000.0 | 65.666667 |
# Step 4: Create the plot
fig, ax = plt.subplots(figsize=(12, 8))
# Create the horizontal barplot with a custom color palette
palette = sns.color_palette("coolwarm", 10)
sns.barplot(x="ROI", y="Company", data=top10_roi, palette=palette, ax=ax)
# Add 'ROI' labels to the bars
for index, row in top10_roi.iterrows():
roi = row["ROI"]
label = f"{roi:.2f}x"
ax.text(roi, index, label, ha="left", va="center", fontsize=10, color="black", fontweight='bold')
# Customize the plot
ax.set(xlabel="Return on Investment (ROI)", ylabel="Company")
plt.title("Top 10 Companies With The Highest ROI", fontsize=16, fontweight='bold')
plt.xticks(rotation=0)
plt.grid(axis='x', linestyle='--', alpha=0.7)
# Improve layout
plt.tight_layout()
# Show the plot
plt.show()
# Step 1: Group by Industry and calculate the total valuation for each industry
industry_valuation = uc.groupby('Industry')['Valuation'].sum().sort_values(ascending=False).reset_index()
# Step 2: Select the top 5 industries by valuation
top5_industries = industry_valuation.head().copy()
# Step 3: Scale the valuation values to billions for better readability
top5_industries.loc[:, 'Valuation (in billions)'] = top5_industries['Valuation'] / 1e9
# Step 4: Display the top 5 industries with their valuation
top5_industries[['Industry', 'Valuation (in billions)']]
| Industry | Valuation (in billions) | |
|---|---|---|
| 0 | Fintech | 882.0 |
| 1 | Internet software & services | 595.0 |
| 2 | E-commerce & direct-to-consumer | 426.0 |
| 3 | Artificial Intelligence | 377.0 |
| 4 | Other | 252.0 |
# Step 4: Create the pie chart
fig, ax = plt.subplots(figsize=(10, 8))
# Create the pie chart with a custom color palette
colors = sns.color_palette("viridis", len(top5_industries))
wedges, texts, autotexts = ax.pie(
top5_industries['Valuation (in billions)'],
labels=top5_industries['Industry'],
colors=colors,
autopct='%1.1f%%',
startangle=140,
pctdistance=0.85,
wedgeprops={'linewidth': 1, 'edgecolor': 'white'}
)
# Customize the text properties
for text in texts:
text.set_color('black')
text.set_fontsize(12)
for autotext in autotexts:
autotext.set_color('white')
autotext.set_fontsize(10)
autotext.set_fontweight('bold')
# Draw a circle at the center of the pie to make it a donut chart
centre_circle = plt.Circle((0, 0), 0.70, fc='white')
fig.gca().add_artist(centre_circle)
# Equal aspect ratio ensures that pie is drawn as a circle.
ax.axis('equal')
plt.title("Top 5 Unicorn Industries by Valuation", fontsize=16, fontweight='bold')
plt.show()
# The Top 5 Countries with the Most Number of Unicorns
top5_countries = uc.groupby('Country')['Company'].size().sort_values(ascending= False).head()
top5_countries
Country United States 562 China 173 India 65 United Kingdom 43 Germany 26 Name: Company, dtype: int64
top5_countries = pd.Series({
'USA': 400,
'China': 300,
'India': 100,
'UK': 80,
'Germany': 60
})
# Extract the data for the pie chart
countries = top5_countries.index
company_counts = top5_countries.values
# Define the explode parameter to separate each slice
explode = [0.05] * len(countries)
# Create the pie chart with percentage labels inside and black labels outside
plt.figure(figsize=(10, 8)) # Adjust the figure size as needed
wedges, texts, autotexts = plt.pie(
company_counts,
labels=countries,
autopct='%1.1f%%',
pctdistance=0.85, # Adjust the percentage label position inside the pie
explode=explode, # Separate each portion slightly
startangle=60, # Start angle to rotate the pie chart
wedgeprops={'edgecolor': 'black', 'linewidth': 1} # Control edge color and width
)
# Customize the labels inside and outside the pie chart
for text, autotext in zip(texts, autotexts):
text.set(color='black', fontsize=14, fontweight='bold')
autotext.set(color='white', fontsize=12, fontweight='bold')
# Add a title
plt.title('Top 5 Countries With Most Number of Unicorn Companies', fontsize=16, fontweight='bold')
# Display the pie chart
plt.axis('equal') # Equal aspect ratio ensures that the pie is drawn as a circle.
plt.show()
# The Top 5 Cities with the higest number of Unicorn Companies
top5_cities = uc.groupby(['City'])['Company'].size().sort_values(ascending=False).head(5)
top5_cities
City San Francisco 152 New York 103 Beijing 63 Shanghai 44 London 34 Name: Company, dtype: int64
# Create a horizontal barplot to show the Top 10 Cities with the highest concentration of Unicorn Companies
plt.figure(figsize=(10, 6)) # Adjust the figure size as needed
sns.set_style("white")
custom_palette = sns.color_palette = ['#08306b', '#08519c', '#6F8FAF', '#0096FF', '#2171b5']
plt.figure(figsize=(12, 6))
sns.barplot(x='City', y='Company', data=top5_cities.reset_index(), palette= custom_palette, dodge=False)
# Add labels for the number of cities for each bar
for i in range(len(top5_cities)):
plt.text(x=i, y=top5_cities.iloc[i]+1, s=top5_cities.iloc[i], ha='center', fontsize=11, fontweight= 'bold')
plt.title('Top 5 Cities with the Most Concentration of Unicorn Companies', fontsize=15, fontweight='bold')
plt.xlabel('Unicorn Cities', fontsize=10, fontweight= 'bold')
plt.ylabel('No. of Unicorn Companies', fontsize=10, fontweight= 'bold')
plt.show()
<Figure size 1000x600 with 0 Axes>
# Unicorn Companies Distribution Across Continent
unicorn_cont = uc.groupby('Continent')['Company'].size().sort_values(ascending= False).head(10)
unicorn_cont
Continent North America 589 Asia 310 Europe 143 South America 21 Oceania 8 Africa 3 Name: Company, dtype: int64
# Sample data for demonstration (replace with your actual data)
continents = unicorn_cont.index
company_counts = unicorn_cont.values
# Create a countplot
plt.figure(figsize=(10, 6))
sns.barplot(x=continents, y=company_counts, palette=['#08306b', '#0096FF', '#2171b5', '#08306b', '#08519c', '#6F8FAF', '#0096FF', '#2171b5', '#08306b', '#08519c'])
plt.title('Distribution of Unicorn Companies Across Continents')
plt.xlabel('Continent')
plt.ylabel('Number of Companies')
plt.show()
# Unicorn Companies Distribution across Industries
company_spread = uc.groupby('Industry')['Company'].size().sort_values(ascending= False).reset_index(name= 'Total Companies')
company_spread
| Industry | Total Companies | |
|---|---|---|
| 0 | Fintech | 224 |
| 1 | Internet software & services | 205 |
| 2 | E-commerce & direct-to-consumer | 111 |
| 3 | Artificial Intelligence | 84 |
| 4 | Health | 74 |
| 5 | Other | 58 |
| 6 | Supply chain, logistics, & delivery | 57 |
| 7 | Cybersecurity | 50 |
| 8 | Data management & analytics | 41 |
| 9 | Mobile & telecommunications | 38 |
| 10 | Hardware | 34 |
| 11 | Auto & transportation | 31 |
| 12 | Edtech | 28 |
| 13 | Consumer & retail | 25 |
| 14 | Travel | 14 |
# Create a bar plot
plt.figure(figsize=(10, 6))
sns.barplot(x='Total Companies', y='Industry', data=company_spread, palette='cubehelix', alpha=1.0)
plt.title('Unicorn Companies Distribution across Industries')
plt.xlabel('Total Companies')
plt.ylabel('Industry')
plt.show()
# The Top 10 Select Investors that have invested the highest funds in Unicorn Companies
def format_funding(funding):
if funding >= 1000000000:
return f'${funding/1000000000:.1f}B'
elif funding >= 1000000:
return f'${funding/1000000:.1f}M'
else:
return f'${funding}'
top10_investors = uc.groupby('Select Investors')['Funding'].sum().sort_values(ascending=False).head(10)
top10_investors = top10_investors.apply(format_funding)
top10_investors
Select Investors Tiger Global Management $14.0B Sequoia Capital China, SIG Asia Investments, Sina Weibo, Softbank Group $8.0B Tencent Holdings, KKR, Smash Ventures $7.0B Founders Fund, Draper Fisher Jurvetson, Rothenberg Ventures $7.0B Aviation Industry Corporation of China, Essence Financial, Jiangsu Sha Steel Group $5.0B Accel India, SAIF Partners, Norwest Venture Partners $5.0B KKR, Tencent Holdings, Sequoia Capital China $5.0B Hillhouse Capital Management, Boyu Capital, Sequoia Capital China $5.0B Baidu Capital, Linear Venture, Tencent $4.0B Tencent Holdings, Warbug Pincus, IDG Capital $4.0B Name: Funding, dtype: object
investors_stacked = uc['Select Investors'].str.split(', ', expand=True).stack().reset_index(level=1, drop=True).rename('Investors')
# Step 2: Count the occurrences of each investor
investor_counts = investors_stacked.value_counts().head(10)
investor_counts
Accel 60 Tiger Global Management 53 Andreessen Horowitz 53 Sequoia Capital China 48 Insight Partners 47 Sequoia Capital 47 Lightspeed Venture Partners 34 SoftBank Group 34 General Catalyst 33 Index Ventures 32 Name: Investors, dtype: int64
# Step 4: Create the pie chart
plt.figure(figsize=(8, 8))
plt.pie(investor_counts, labels=investor_counts.index, autopct='%1.1f%%', startangle=140, colors=plt.cm.Paired(range(len(investor_counts))))
plt.title('Top 10 Investors in Unicorn Companies', fontsize=16, fontweight='bold', y=1.1)
Text(0.5, 1.1, 'Top 10 Investors in Unicorn Companies')
def format_funding(funding):
if funding >= 1e9:
return f'${funding / 1e9:.1f}B'
elif funding >= 1e6:
return f'${funding / 1e6:.1f}M'
else:
return f'${funding}'
# Calculate the total valuation for each country
top10_countries = uc.groupby('Country')['Valuation'].sum().sort_values(ascending=False).head(10)
# Format the valuation values
top10_countries_formatted = top10_countries.apply(format_funding)
# Display the top 10 countries with their formatted valuation
print(top10_countries_formatted)
Country United States $1933.0B China $696.0B India $196.0B United Kingdom $195.0B Germany $72.0B Sweden $63.0B Australia $56.0B France $55.0B Canada $49.0B South Korea $41.0B Name: Valuation, dtype: object
# Create a bar plot
plt.figure(figsize=(12, 6))
top10_countries.plot(kind='bar', color=['#6F8FAF', '#0096FF', '#2171b5', '#08306b', '#08519c', '#6F8FAF', '#0096FF', '#2171b5', '#08306b', '#08519c'])
plt.title('Top 10 Countries with the Highest Unicorn Valuation', fontsize=16, fontweight='bold')
plt.xlabel('Country', fontsize=14)
plt.ylabel('Total Valuation (in USD)', fontsize=14)
plt.xticks(rotation=0, ha='right', fontsize=12)
plt.tight_layout()
plt.show()
# Unicorn Companies Trend By Year Joined
total_companies = uc.groupby('Date Joined')['Company'].size().reset_index(name= 'Total Companies')
total_companies
| Date Joined | Total Companies | |
|---|---|---|
| 0 | 2007-07-02 | 1 |
| 1 | 2011-04-02 | 1 |
| 2 | 2011-12-12 | 1 |
| 3 | 2012-02-13 | 1 |
| 4 | 2012-06-06 | 1 |
| ... | ... | ... |
| 634 | 2022-03-23 | 3 |
| 635 | 2022-03-29 | 2 |
| 636 | 2022-03-30 | 2 |
| 637 | 2022-03-31 | 1 |
| 638 | 2022-04-05 | 3 |
639 rows × 2 columns
#Convert 'Date Joined' to datetime format
uc['Date Joined'] = pd.to_datetime(uc['Date Joined'])
# Extract year from the 'Date Joined' column
uc['Year'] = uc['Date Joined'].dt.year
# Sum the total companies by the year they joined
total_companies_by_year = uc.groupby('Year')['Company'].size().reset_index(name='Total Companies')
# Create a line plot
plt.figure(figsize=(12, 6)) # Adjust the figure size as needed
sns.set_style("white")
# Create the line plot
ax = sns.lineplot(x='Year', y='Total Companies', data=total_companies_by_year, marker='o', color='darkred', label='Total No. of Unicorn Companies')
# Add labels with the count on top of each data point
for index, row in total_companies_by_year.iterrows():
ax.text(row['Year'], row['Total Companies'], f'{row["Total Companies"]}', ha='center', va='bottom', fontsize=10, color='black', fontweight='bold')
# Customize the plot
ax.set(xlabel="Year Joined", ylabel="Total Companies")
plt.title("Unicorn Companies Trend By Year Joined", fontsize=13, fontweight='bold')
# Hide the grid lines
ax.grid(False)
# Show the plot
plt.tight_layout()
plt.show()
companies_by_year = uc.groupby('Year Founded')['Company'].nunique().reset_index(name='Total Companies')
companies_by_year
| Year Founded | Total Companies | |
|---|---|---|
| 0 | 1919 | 1 |
| 1 | 1979 | 1 |
| 2 | 1984 | 1 |
| 3 | 1990 | 1 |
| 4 | 1991 | 1 |
| 5 | 1992 | 1 |
| 6 | 1993 | 1 |
| 7 | 1994 | 2 |
| 8 | 1995 | 2 |
| 9 | 1996 | 1 |
| 10 | 1997 | 1 |
| 11 | 1998 | 5 |
| 12 | 1999 | 8 |
| 13 | 2000 | 11 |
| 14 | 2001 | 9 |
| 15 | 2002 | 4 |
| 16 | 2003 | 8 |
| 17 | 2004 | 8 |
| 18 | 2005 | 14 |
| 19 | 2006 | 15 |
| 20 | 2007 | 24 |
| 21 | 2008 | 27 |
| 22 | 2009 | 34 |
| 23 | 2010 | 40 |
| 24 | 2011 | 82 |
| 25 | 2012 | 95 |
| 26 | 2013 | 87 |
| 27 | 2014 | 109 |
| 28 | 2015 | 155 |
| 29 | 2016 | 110 |
| 30 | 2017 | 74 |
| 31 | 2018 | 61 |
| 32 | 2019 | 45 |
| 33 | 2020 | 25 |
| 34 | 2021 | 11 |
# Multivariate
# Unicorn Companies Distribution across Cities, Countries and Continents
top_cities = uc.groupby(['City', 'Country', 'Continent'])['Company'].nunique().reset_index(name='Total Companies')
top_cities.head(11)
| City | Country | Continent | Total Companies | |
|---|---|---|---|---|
| 0 | Aarhus | Denmark | Europe | 1 |
| 1 | Aberdeen | United Kingdom | Europe | 1 |
| 2 | Alameda | United States | North America | 1 |
| 3 | Alexandria | Australia | Oceania | 1 |
| 4 | Altrincham | United Kingdom | Europe | 1 |
| 5 | Ambler | United States | North America | 1 |
| 6 | Amsterdam | Netherlands | Europe | 6 |
| 7 | Andheri | India | Asia | 1 |
| 8 | Arlington | United States | North America | 1 |
| 9 | Atlanta | United States | North America | 7 |
| 10 | Austin | United States | North America | 8 |
import plotly.express as px
# Create the scatter plot
fig = px.scatter(
top_cities,
x='City',
y='Total Companies',
color='Continent',
size='Total Companies',
hover_name='City',
title='Unicorn Companies Distribution across Cities, Countries, and Continents',
labels={'Total Companies': 'Total Companies'}
)
# Show the plot
fig.show()
# Multivariate
# Unicorn Companies Valuation Spread over the Years
def format_funding(funding):
if funding >= 1000000000:
return f'${funding/1000000000:.1f}B'
elif funding >= 1000000:
return f'${funding/1000000:.1f}M'
else:
return f'${funding}'
unicorn_val = uc.groupby('Date Joined')['Valuation'].sum().sort_values(ascending= True)
unicorn_val = unicorn_val.apply(format_funding)
unicorn_val
Date Joined
2007-07-02 $1.0B
2018-04-10 $1.0B
2020-12-03 $1.0B
2020-11-12 $1.0B
2018-05-03 $1.0B
...
2021-07-20 $49.0B
2014-01-23 $95.0B
2012-12-01 $100.0B
2018-07-03 $100.0B
2017-04-07 $180.0B
Name: Valuation, Length: 639, dtype: object
# Function to format the valuation
def format_funding(funding):
if funding >= 1e9:
return f'${funding/1e9:.1f}B'
elif funding >= 1e6:
return f'${funding/1e6:.1f}M'
else:
return f'${funding:.0f}'
# Group by 'Industry' and sum the 'Valuation', then get the 5 industries with the least valuation
least5_ind = uc.groupby('Industry')['Valuation'].sum().sort_values(ascending=True).head(5)
# Apply the formatting function
least5_ind_formatted = least5_ind.apply(format_funding)
# Create a DataFrame for better handling in plotting
least5_ind_df = least5_ind_formatted.reset_index()
least5_ind_df.columns = ['Industry', 'Valuation']
least5_ind_df
| Industry | Valuation | |
|---|---|---|
| 0 | Travel | $46.0B |
| 1 | Mobile & telecommunications | $89.0B |
| 2 | Auto & transportation | $99.0B |
| 3 | Hardware | $99.0B |
| 4 | Edtech | $100.0B |
# Convert 'Valuation' to numeric format
least5_ind_df['Valuation'] = pd.to_numeric(least5_ind_df['Valuation'].replace('[\$,]', '', regex=True), errors='coerce')
# Drop rows with NaN values in 'Valuation'
least5_ind_df = least5_ind_df.dropna(subset=['Valuation'])
# Convert valuations to billions
least5_ind_df['Valuation'] = least5_ind_df['Valuation'] / 1e9
# Display the resulting dataframe
print(least5_ind_df)
Empty DataFrame Columns: [Industry, Valuation] Index: []
import plotly.express as px
# Create a treemap
fig = px.treemap(
least5_ind_df,
path=['Industry'],
values='Valuation',
color='Valuation',
color_continuous_scale='RdYlGn', # Changed color scale to 'RdYlGn'
title='Top 10 Industries with the Least Valuation'
)
# Update the layout for better appearance
fig.update_layout(
title=dict(
text='Top 10 Industries with the Least Valuation',
x=0.5, # Center the title
xanchor='center',
font=dict(size=24, family='Arial', color='black')
),
font=dict(size=14, family='Arial'),
coloraxis_colorbar=dict(
title='Valuation (in Billions)',
titleside='right',
titlefont=dict(size=16, family='Arial'),
tickfont=dict(size=14, family='Arial')
)
)
# Format the values in the treemap for better readability
fig.data[0].texttemplate = "%{label}<br>Valuation: $%{value:.2f}B"
# Show the treemap
fig.show()
# Group by 'Company' and sum the 'Valuation'
top7_companies = uc.groupby('Company')['Valuation'].sum().sort_values(ascending=False).head(7)
# Reset the index to create a new default integer-based index
top7_companies_reset = top7_companies.reset_index()
# Convert 'Valuation' to numeric format
top7_companies_reset['Valuation'] = pd.to_numeric(top7_companies_reset['Valuation'].replace('[\$,]', '', regex=True), errors='coerce')
# Drop rows with NaN values in 'Valuation'
top7_companies_reset = top7_companies_reset.dropna(subset=['Valuation'])
# Convert valuations to billions
top7_companies_reset['Valuation'] = top7_companies_reset['Valuation'] / 1e9
# Create a treemap
fig = px.treemap(
top7_companies_reset,
path=['Company'],
values='Valuation',
color='Company', # Use 'Company' column to assign different colors
title='Top 7 Companies by Valuation'
)
# Update the layout for better appearance
fig.update_layout(
title=dict(
text='Top 7 Companies by Valuation',
x=0.5, # Center the title
xanchor='center',
font=dict(size=24, family='Arial', color='black')
),
font=dict(size=14, family='Arial')
)
# Format the values in the treemap for better readability
fig.data[0].texttemplate = "%{label}<br>Valuation: $%{value:.2f}B"
# Show the treemap
fig.show()
from plotly.subplots import make_subplots
#Group by 'Company' and sum the 'Valuation'
top7_companies = uc.groupby('Company')['Valuation'].sum().sort_values(ascending=True).head(7)
# Reset the index to create a new default integer-based index
top7_companies_reset = top7_companies.reset_index()
# Convert 'Valuation' to numeric format
top7_companies_reset['Valuation'] = pd.to_numeric(top7_companies_reset['Valuation'].replace('[\$,]', '', regex=True), errors='coerce')
# Drop rows with NaN values in 'Valuation'
top7_companies_reset = top7_companies_reset.dropna(subset=['Valuation'])
# Convert valuations to billions
top7_companies_reset['Valuation'] = top7_companies_reset['Valuation'] / 1e9
# Create a treemap
fig = px.treemap(
top7_companies_reset,
path=['Company'],
values='Valuation',
color='Company', # Use 'Company' column to assign different colors
title='Top 7 Companies by Valuation'
)
# Update the layout for better appearance
fig.update_layout(
title=dict(
text='Top 7 Companies by Valuation',
x=0.5, # Center the title
xanchor='center',
font=dict(size=24, family='Arial', color='black')
),
font=dict(size=14, family='Arial')
)
# Format the values in the treemap for better readability
fig.data[0].texttemplate = "%{label}<br>Valuation: $%{value:.2f}B"
# Show the treemap
fig.show()
import pandas as pd
import plotly.express as px
# Convert "Valuation" column to numeric
uc['Valuation'] = pd.to_numeric(uc['Valuation'], errors='coerce')
# Drop rows with missing values in "Valuation" column
uc = uc.dropna(subset=['Valuation'])
# Ensure 'Investors Count' exists; if not, compute it as a placeholder
if 'Investors Count' not in uc.columns:
uc['Investors Count'] = uc['Select Investors'].apply(lambda x: len(str(x).split(',')))
# Get the top 20 companies by valuation
top_20_companies = uc.nlargest(20, 'Valuation')
# Create the scatter plot using Plotly Express
fig4 = px.scatter(top_20_companies, x="Valuation", y="Funding",
size="Investors Count",
color="Industry",
hover_name="Company", size_max=60, title="Top 20 companies")
# Update the layout to include a legend title
fig4.update_layout(
legend=dict(
title="Industry"
)
)
# Show the figure
fig4.show()
# Show the figure if data exists, otherwise print a message
if not top_20_companies.empty:
fig4.show()
else:
print("No data available to plot.")
No data available to plot.
import pycountry
import pandas as pd
import plotly.express as px
# Calculate the total valuation by country
top_10_countries = uc[["Country", "Valuation"]].groupby(by="Country").sum()
top_10_countries = top_10_countries.sort_values(by="Valuation", ascending=False)[:10].reset_index()
top_10_countries_total_valuation = top_10_countries["Valuation"].sum()
top_10_countries_total_valuation_perc = top_10_countries_total_valuation * 100 / uc["Valuation"].sum()
top_10_countries["iso_code"] = top_10_countries["Country"].apply(lambda x: pycountry.countries.lookup(x).alpha_3)
# Create the choropleth map using Plotly Express
fig7 = px.choropleth(top_10_countries, locations="iso_code", color="Valuation",
hover_name="Country", color_continuous_scale='sunsetdark',
title=f"Valuation for top 10 countries is {top_10_countries_total_valuation:.1f} B$ ({top_10_countries_total_valuation_perc:.1f}% of total)"
)
# Show the figure
fig7.show()